<?php
/**
 * html dom 解析器
 */

class Spider_DomParser {

    /**
     * dom 节点
     * @var type
     */
    private $_dom = null;

    /**
     * 文档编码
     * @var type
     */
    public $charset = 'UTF-8';

    public function  __construct($html, $charset = 'UTF-8') {

        V::import('SimpleHtmlDom', 'third');
        $this->_dom = str_get_html($html, true, true, $charset);

        $this->charset = $charset;
    }

    public function  __destruct() {

        $this->_dom->clear();
        unset($this->_dom);
    }

    /**
     * 获取元素
     * @param <type> $selector
     * @param <type> $index 指定则返回指定元素，否则返回数组
     * @return SimpleHtmlDomNode/array
     */
    public function select($selector, $index = null) {

        if (null === $index) {
            return $this->_dom->find($selector);
        } else {
            return $this->_dom->find($selector, $index);
        }
    }

    /**
     * 从文档中获取实体
     * @param Spider_DomParse_Entity $entity
     */
    public function fetchEntity(Spider_DomParser_Entity $entity) {

        return $entity->parse($this);
    }
}
